/*-----------------------------------------------*
Name: Haqdarshak $pilot Intervention/Endline-Cleaning
Date Created: 30 July, 2018
Date Last Modified: 28 April, 2021
Created by: Aaron Berman and modified by Daniela Paz and Saumya Mathur on Nov.13/2020  
Description: Clean raw data to create a intermediate endline takeup file
*-------------------------------------------------*/


*..................................................
**Install user-written commands
foreach package in mdesc nmissing veracrypt {
     capture which `package'
	 if _rc==111 ssc install `package'
}
*..................................................
clear all
set more off
version 12.0
cap log close
pause off



//open log 
*cd "$logs"
*log using "clean_endlinetakeup.smcl", replace



//locals for different stats/regressions (screenings vs. schemes):
local schemecounts = 0
local schemeregs = `schemecounts'
local screeningcounts = 1
local screeningregs = `screeningcounts'
local eligcounts = 1
local trackingcounts = 0
local householdroster = 0
local hh_eligibility = 0
local endline_takeup = 1

local regressions = 1
	if `regressions' == 1 local hh_eligibility = 1



if `hh_eligibility' == 1 {
	use "$pilot/hh_data_treatment_b", clear 

	preserve
	keep household_id annual_income housekind_kuccha housekindobserve_kuccha enter_annual_income
	tempfile hh_attributes
	save "`hh_attributes'", replace
	restore

	use "$pilot/individual_data_treatment_b", clear 


	** merge with household attributes
	merge m:1 household_id using "`hh_attributes'"
	assert _merge == 3
	drop _merge

	** drop if missing
	drop if missing(enter_years_) & missing(member_gender_)


	** 3g age
	rename enter_years_ age
	label var age "Age"

	** check age of first respondent
	tab age if member_num == "1", m
	assert age >= 18 & !missing(age) if member_num == "1"
	sum age if member_num == "1", d

	** 3h gender
	gen male = 1 if member_gender_ == 1
	replace male = 0 if member_gender_ == 2
	tab male, m
	label var male "Male"

	** 3i literacy
	gen literate = read_write_ == 1
	tab literate, m
	label var literate "Can read of write"

	** 3k enrolled in school
	rename enrolled_in_school_ enrolled_in_school
	tab enrolled_in_school, m
	label var enrolled_in_school "Currently enrolled in school or training"

	** 3l marital status
	rename member_marital_status_ marital_status

	** 3m occupation
	rename occupation_ occupation

	** 3n income in last month

	** 3o hours worked last week

	** 3p have documents

	** 3q bank account

	** 3r disabled
	rename disabled_ disabled
	tab disabled, m
	label var disabled "Disabled"

	** 3s disability percentage

	** 3t adopted orphan
	rename adopted_orphan_ adopted_orphan

	** 3u mother separated/divorced/widowed/remarried widow
	rename mother_widowed_ mother_widowed

	** 3v mother or father disabled
	rename mother_father_disabled_ mother_father_disabled


	****************
	** code up eligibilities for top 8 schemes in Rajasthan
	****************

	** make sure not missing key variables
	assert !missing(male)
	*assert !missing(age)

	** 1
	gen elig_sukanya = 0
	replace elig_sukanya = 1 if age <= 10 & !missing(age) & male == 0
	tab elig_sukanya, m

	** 2
	gen elig_postoffice = 0
	replace elig_postoffice = 1 // if age >= 18 (Flavvy said this no longer applies)
	tab elig_postoffice, m

	** 3 
	gen elig_laborcard = 0
	replace elig_laborcard = 1 if age >= 18 & age <= 60 & inlist(occupation, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)
	tab elig_laborcard, m

	** 4
	gen elig_awas_yojana = 0 // not done
	replace elig_awas_yojana = 1 if age >=18 & !missing(age) & housekind_kuccha == 1 // & housekind_kuccha housekindobserve_kuccha == 1
	tab elig_awas_yojana, m

	** 5
	gen elig_oldage_pension = 0 
	replace elig_oldage_pension = 1 if age >= 58 & !missing(age) & male == 1 & enter_annual_income < 48000
	replace elig_oldage_pension = 1 if age >= 55 & !missing(age) & male == 0 & enter_annual_income < 48000
	tab elig_oldage_pension, m

	** 6
	gen elig_palanhaar = . // this was too sensitive, so we did not ask these questions
	tab elig_palanhaar, m

	** 7
	gen elig_widow_pension = 0
	replace elig_widow_pension = 1 if age >= 18 & !missing(age) & male == 0 & enter_annual_income < 48000 & inlist(marital_status,3,4,5,6)
	tab elig_widow_pension, m

	** 8
	gen elig_atalpension = 0
	replace elig_atalpension = 1 if age >=18 & age <=40 & !missing(age)
	tab elig_atalpension, m

	pause
	//Generate household-level eligibility variables 
	collapse (sum) elig_*, by(household_id)
	//drop labor card variable since not one of top schemes 
	drop elig_laborcard

	foreach x of varlist elig_* {
		replace `x' = 1 if `x' != 0 & !missing(`x')
	}


	//tempfile 
	tostring household_id, replace
	tempfile hh_elig_dummies
	save `hh_elig_dummies', replace


}





//code takeup of schemes from endline survey 
if `endline_takeup' == 1 {


	//code baseline control households 
	use "$pilot/hh_data_treatment_b", clear
	keep if treatment == 0

	//generate date 
	gen baseline_start_day = substr(starttime, 1, 2)
	destring baseline_start_day, replace 
	gen start_month_string = substr(starttime, 4, 3) if baseline_start_day > 9 
	replace start_month_string = substr(starttime, 3, 3) if baseline_start_day <= 9
	gen baseline_start_month = 2 if start_month_string == "Feb"
	replace baseline_start_month = 3 if start_month_string == "Mar"


	keep household_id enter_annual_income above_48000 caste* applied_for_govt_scheme administer_module10 village_id stratum baseline_start_day baseline_start_month satisfied_grampanchayat difficulty_in_application
	gen control_hh = 1
	tempfile control_hhs
	save `control_hhs', replace 


}








*Scheme and Attitude Regressions
if `regressions' == 1 {
	
	//use tracking sheets and merge in household roster data to determine who was disabled 
	cd "$pilot"
	use "pilot_intervention_tracking_sheets_all", clear 

	//merge by member id 
	merge m:1 member_id using "pilot_baseline_household_roster_all.dta"
	keep if _merge == 3 
	drop _merge 

	//indicate which households had disabled people screened 
	collapse (sum) num_disabled_screened=disabled, by(household_id) 
	gen hh_disabled_screened = (num_disabled_screened > 0 & !missing(num_disabled_screened))
	drop num_disabled_screened

	replace household_id = 810125 if household_id == 401185

	tostring household_id, replace 
	tempfile hh_disabled 
	save `hh_disabled', replace 

	//Use household roster data 
	cd "$pilot"
	use "pilot_baseline_household_roster_all", clear 

	//count number of HH members/children 
	collapse (count) num_members=member_id (sum) hh_num_child=child hh_num_disabled=disabled, by(household_id)   //change to member_id instead of unique member id
	tostring household_id, replace

	tempfile household_member_counts
	save `household_member_counts', replace 

	//get village & stratum designations 
	use "$pilot/hh_data_treatment_b", clear 
	preserve 
	keep household_id village_id stratum administer_module10
	tostring household_id, replace
	tempfile village_strata
	save `village_strata', replace 
	restore 

	//create other relevant household-level variables
	//disability
	egen num_disabled = rowtotal(disabled_*)
	gen any_disability_hh = (num_disabled > 0) if !missing(num_disabled)

	gen member_male_1 = (member_gender_1 == 1)

	//economic disruptions variable
	egen num_disruptions = rowtotal(E_death_head_of_hh-E_dropinincome_cropfailure)

	//generate date 
	gen baseline_start_day = substr(starttime, 1, 2)
	destring baseline_start_day, replace 
	gen start_month_string = substr(starttime, 4, 3) if baseline_start_day > 9 
	replace start_month_string = substr(starttime, 3, 3) if baseline_start_day <= 9
	gen baseline_start_month = 2 if start_month_string == "Feb"
	replace baseline_start_month = 3 if start_month_string == "Mar"

	keep household_id any_disability_hh enter_annual_income above_48000 caste* applied_for_govt_scheme member_male_1 num_disruptions administer_module10 baseline_start_day baseline_start_month satisfied_grampanchayat difficulty_in_application
	tostring household_id, replace
	tempfile baseline_characteristics
	save `baseline_characteristics', replace 


	//use household-level data from previous section 
	use "$pilot/pilot_intervention_household_screening_counts", clear 

	tostring household_id, replace
	merge 1:1 household_id using `household_member_counts' 
		//_merge == 1 means erroneous or missing HHID
		//_merge == 2 means baseline HH has not yet been screened 
	keep if _merge == 3
	drop _merge
	duplicates report household_id     //CHECKING
	merge 1:1 household_id using `baseline_characteristics'
		//_merge == 1 means erroneous or missing HHID
		//_merge == 2 means baseline HH has not yet been screened
	keep if _merge == 3
	drop _merge	
	duplicates report household_id     //CHECKING
	merge 1:1 household_id using `village_strata'
		//_merge == 1 means erroneous or missing HHID 
		//_merge == 2 means baseline HH has not yet been screened 
	keep if _merge == 3
	drop _merge 
	duplicates report household_id     //CHECKING
	merge 1:1 household_id using `hh_disabled'
		//_merge == 1 means HHID not in tracking sheets 
		//_merge == 2 indicates discrepancy between app and tracking sheets
	replace hh_disabled_screened = 0 if _merge == 1
	drop _merge 
	duplicates report household_id     //CHECKING

	merge 1:1 household_id using `hh_elig_dummies'
	keep if _merge == 3
	drop _merge 
	duplicates report household_id     //CHECKING

	//generate treatment groups 
	gen treat_100 = (discount == 100)
	gen treat_50 = (discount == 50)
	gen treat_0 = (discount == 0)

	*******************************************************************************
	***APPEND CONTROL HOUSEHOLDS FROM BASELINE DATA 
	*******************************************************************************
	destring household_id, replace 
	append using `control_hhs'
	
	duplicates report household_id
	duplicates tag household_id, gen(tag)
	tab tag
	duplicates report household_id
	drop tag

	merge 1:1 household_id using "$pilot/endline_takeup"
	tab _merge 
	// NOT ALL HOUSEHOLDS MERGE SUCCESSFULLY-ACCORDING TO FIELD REPORTS_MERGE==1 ARE HOUSEHOLDS THAT REFUSED TO ANSWER OR SHIFTED
	pause
	qui count if _merge == 1 //count those who are missing endline observations  
	if r(N) != 0 {
		preserve
		keep if _merge == 1
		drop _merge 
		pause 
		restore
	}

	keep if _merge == 3
	drop _merge 


	//generate outcomes for each endline scheme 
	gen months_bl_endline = 12 + (endline_start_month - baseline_start_month) if endline_start_day >= baseline_start_day
	replace  months_bl_endline = 11 + (endline_start_month - baseline_start_month) if endline_start_day < baseline_start_day

	foreach x in "sukanya" "palanhaar" "ujjwalla" "oldagepension" "widowpension" "awas" "atalpension" {
		gen recd_`x'_sincebl = 0
		replace recd_`x'_sincebl = 1 if firstreceived_`x'_when == 1 //number of days since received 
		replace recd_`x'_sincebl = 1 if firstreceived_`x'_when == 2 & firstreceived_`x'_mo <= months_bl_endline
		replace recd_`x'_sincebl = 1 if firstreceived_`x'_when == 3 & firstreceived_`x'_y == 1

		replace applied_`x' = 0 if missing(applied_`x') 
		replace applied_`x' = 1 if applied_`x' == 2
		replace applied_`x' = . if applied_`x' == 999

		replace receiving_`x' = 0 if missing(receiving_`x')
		replace receiving_`x' = 1 if receiving_`x' == 2

		replace heardof_`x' = 0 if missing(heardof_`x')
		replace heardof_`x' = . if heardof_`x' == 999

		gen `x'_isuseful = (rateusefulness_`x' == 1 | rateusefulness_`x' == 2) if rateusefulness_`x' < 998
	}


	//need to do post office savings separately due to variable naming issue 
	gen recd_postofficesavings_sincebl = 0
	replace recd_postofficesavings_sincebl = 1 if firstreceived_postofficesavings_ == 1
	replace recd_postofficesavings_sincebl = 1 if firstreceived_postofficesavings_ == 2 & v75 <= months_bl_endline
	replace recd_postofficesavings_sincebl = 1 if firstreceived_postofficesavings_ == 3 & v76 == 1

	replace applied_postofficesavings = 0 if missing(applied_postofficesavings)
	replace applied_postofficesavings = 1 if applied_postofficesavings == 2

	replace receiving_postofficesavings = 0 if missing(receiving_postofficesavings)
	replace receiving_postofficesavings = 1 if receiving_postofficesavings == 2

	replace heardof_postofficesavings = 0 if missing(heardof_postofficesavings)

	gen postofficesavings_isuseful = (rateusefulness_postofficesavings == 1 | rateusefulness_postofficesavings == 2) if rateusefulness_postofficesavings < 998

	
	save "$intermediate_data/endline_takeup_clean", replace
	
}


